#!/bin/bash
STREAM=/home/hadoop/hadoop/hadoop_install/contrib/streaming
#==============================================
MAPPER=/home/hadoop/progs/python/projects/rt/step1_tweets_0_preprocess/mapper.py
REDUCER=/home/hadoop/progs/python/projects/rt/step1_tweets_0_preprocess/reducer.py
FILE1=/home/hadoop/progs/python/projects/rt/mylib.py
#==============================================
INPUT=/home/lih307/projects/rt/raw_tweets/twitter_tweets_0
OUTPUT=/home/lih307/projects/rt/step_1_tweets_0_unique
#==============================================
REDUCE_NUM=25
#=============================================
hadoop fs -rmr $OUTPUT
hadoop jar $STREAM/hadoop-streaming-1.0.3.jar \
    -D mapred.reduce.tasks=$REDUCE_NUM \
    -input $INPUT \
    -output $OUTPUT \
    -mapper $MAPPER \
    -reducer $REDUCER \
    -file $MAPPER \
    -file $REDUCER \
    -file $FILE1